{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2023-12-06T01:58:52.524106500Z",
     "start_time": "2023-12-06T01:58:52.518084400Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 导入数据集"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "data": {
      "text/plain": "11914"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('F:/机器学习数据集/cardataset/data.csv')\n",
    "len(df) # 数据集长度"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T01:32:56.957376600Z",
     "start_time": "2023-12-06T01:32:56.692914Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "data": {
      "text/plain": "Make                  object\nModel                 object\nYear                   int64\nEngine Fuel Type      object\nEngine HP            float64\nEngine Cylinders     float64\nTransmission Type     object\nDriven_Wheels         object\nNumber of Doors      float64\nMarket Category       object\nVehicle Size          object\nVehicle Style         object\nhighway MPG            int64\ncity mpg               int64\nPopularity             int64\nMSRP                   int64\ndtype: object"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes # 查看数据列的类型"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T01:47:58.262468300Z",
     "start_time": "2023-12-06T01:47:58.252467Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "data": {
      "text/plain": "  Make       Model  Year             Engine Fuel Type  Engine HP  \\\n0  BMW  1 Series M  2011  premium unleaded (required)      335.0   \n1  BMW    1 Series  2011  premium unleaded (required)      300.0   \n2  BMW    1 Series  2011  premium unleaded (required)      300.0   \n3  BMW    1 Series  2011  premium unleaded (required)      230.0   \n4  BMW    1 Series  2011  premium unleaded (required)      230.0   \n\n   Engine Cylinders Transmission Type     Driven_Wheels  Number of Doors  \\\n0               6.0            MANUAL  rear wheel drive              2.0   \n1               6.0            MANUAL  rear wheel drive              2.0   \n2               6.0            MANUAL  rear wheel drive              2.0   \n3               6.0            MANUAL  rear wheel drive              2.0   \n4               6.0            MANUAL  rear wheel drive              2.0   \n\n                         Market Category Vehicle Size Vehicle Style  \\\n0  Factory Tuner,Luxury,High-Performance      Compact         Coupe   \n1                     Luxury,Performance      Compact   Convertible   \n2                Luxury,High-Performance      Compact         Coupe   \n3                     Luxury,Performance      Compact         Coupe   \n4                                 Luxury      Compact   Convertible   \n\n   highway MPG  city mpg  Popularity   MSRP  \n0           26        19        3916  46135  \n1           28        19        3916  40650  \n2           28        20        3916  36350  \n3           28        18        3916  29450  \n4           28        18        3916  34500  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Make</th>\n      <th>Model</th>\n      <th>Year</th>\n      <th>Engine Fuel Type</th>\n      <th>Engine HP</th>\n      <th>Engine Cylinders</th>\n      <th>Transmission Type</th>\n      <th>Driven_Wheels</th>\n      <th>Number of Doors</th>\n      <th>Market Category</th>\n      <th>Vehicle Size</th>\n      <th>Vehicle Style</th>\n      <th>highway MPG</th>\n      <th>city mpg</th>\n      <th>Popularity</th>\n      <th>MSRP</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>BMW</td>\n      <td>1 Series M</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>335.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Factory Tuner,Luxury,High-Performance</td>\n      <td>Compact</td>\n      <td>Coupe</td>\n      <td>26</td>\n      <td>19</td>\n      <td>3916</td>\n      <td>46135</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>BMW</td>\n      <td>1 Series</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Luxury,Performance</td>\n      <td>Compact</td>\n      <td>Convertible</td>\n      <td>28</td>\n      <td>19</td>\n      <td>3916</td>\n      <td>40650</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>BMW</td>\n      <td>1 Series</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Luxury,High-Performance</td>\n      <td>Compact</td>\n      <td>Coupe</td>\n      <td>28</td>\n      <td>20</td>\n      <td>3916</td>\n      <td>36350</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>BMW</td>\n      <td>1 Series</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Luxury,Performance</td>\n      <td>Compact</td>\n      <td>Coupe</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n      <td>29450</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>BMW</td>\n      <td>1 Series</td>\n      <td>2011</td>\n      <td>premium unleaded (required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>MANUAL</td>\n      <td>rear wheel drive</td>\n      <td>2.0</td>\n      <td>Luxury</td>\n      <td>Compact</td>\n      <td>Convertible</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n      <td>34500</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head() # 查看数据"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T01:36:18.372349600Z",
     "start_time": "2023-12-06T01:36:18.353323900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "data": {
      "text/plain": "  make       model  year             engine_fuel_type  engine_hp  \\\n0  bmw  1_series_m  2011  premium_unleaded_(required)      335.0   \n1  bmw    1_series  2011  premium_unleaded_(required)      300.0   \n2  bmw    1_series  2011  premium_unleaded_(required)      300.0   \n3  bmw    1_series  2011  premium_unleaded_(required)      230.0   \n4  bmw    1_series  2011  premium_unleaded_(required)      230.0   \n\n   engine_cylinders transmission_type     driven_wheels  number_of_doors  \\\n0               6.0            manual  rear_wheel_drive              2.0   \n1               6.0            manual  rear_wheel_drive              2.0   \n2               6.0            manual  rear_wheel_drive              2.0   \n3               6.0            manual  rear_wheel_drive              2.0   \n4               6.0            manual  rear_wheel_drive              2.0   \n\n                         market_category vehicle_size vehicle_style  \\\n0  factory_tuner,luxury,high-performance      compact         coupe   \n1                     luxury,performance      compact   convertible   \n2                luxury,high-performance      compact         coupe   \n3                     luxury,performance      compact         coupe   \n4                                 luxury      compact   convertible   \n\n   highway_mpg  city_mpg  popularity   msrp  \n0           26        19        3916  46135  \n1           28        19        3916  40650  \n2           28        20        3916  36350  \n3           28        18        3916  29450  \n4           28        18        3916  34500  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>make</th>\n      <th>model</th>\n      <th>year</th>\n      <th>engine_fuel_type</th>\n      <th>engine_hp</th>\n      <th>engine_cylinders</th>\n      <th>transmission_type</th>\n      <th>driven_wheels</th>\n      <th>number_of_doors</th>\n      <th>market_category</th>\n      <th>vehicle_size</th>\n      <th>vehicle_style</th>\n      <th>highway_mpg</th>\n      <th>city_mpg</th>\n      <th>popularity</th>\n      <th>msrp</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>bmw</td>\n      <td>1_series_m</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>335.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>factory_tuner,luxury,high-performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>26</td>\n      <td>19</td>\n      <td>3916</td>\n      <td>46135</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,performance</td>\n      <td>compact</td>\n      <td>convertible</td>\n      <td>28</td>\n      <td>19</td>\n      <td>3916</td>\n      <td>40650</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,high-performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>28</td>\n      <td>20</td>\n      <td>3916</td>\n      <td>36350</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n      <td>29450</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury</td>\n      <td>compact</td>\n      <td>convertible</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n      <td>34500</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns = df.columns.str.lower().str.replace(' ','_') # 所有列名小写并用下滑线替代空格\n",
    "str_cols = list(df.dtypes[df.dtypes=='object'].index)# 选择类型为字符串的列\n",
    "for col in str_cols:\n",
    "    df[col] = df[col].str.lower().str.replace(\" \",\"_\")# 所有字符串列中的空格用列来代替\n",
    "df.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T01:49:48.025126700Z",
     "start_time": "2023-12-06T01:49:47.931090200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "data": {
      "text/plain": "make                    0\nmodel                   0\nyear                    0\nengine_fuel_type        3\nengine_hp              69\nengine_cylinders       30\ntransmission_type       0\ndriven_wheels           0\nnumber_of_doors         6\nmarket_category      3742\nvehicle_size            0\nvehicle_style           0\nhighway_mpg             0\ncity_mpg                0\npopularity              0\nmsrp                    0\ndtype: int64"
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().sum() # 统计缺失值"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T01:50:23.202345900Z",
     "start_time": "2023-12-06T01:50:23.141814Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "data": {
      "text/plain": "make                 0\nmodel                0\nyear                 0\nengine_fuel_type     0\nengine_hp            0\nengine_cylinders     0\ntransmission_type    0\ndriven_wheels        0\nnumber_of_doors      0\nmarket_category      0\nvehicle_size         0\nvehicle_style        0\nhighway_mpg          0\ncity_mpg             0\npopularity           0\nmsrp                 0\ndtype: int64"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=df.fillna(0)\n",
    "df.isnull().sum()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T01:56:19.845783Z",
     "start_time": "2023-12-06T01:56:19.811764900Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 探索性数据分析"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEDCAYAAAA849PJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQsElEQVR4nO3db4xcV3nH8e/O+O/C2kPLmrQSIoWWR27V0BIaKLETiwZCAjQVVfoiAkJJ02C5CmqRUpI4lShGAURD66KEdmmUhBRVxQgVAk6QaBvZ25YImkqOSJ7IUQGpFZIJrL2uYzteT1/MrBiW3flz18zs+nw/r2bOOXfvOcdX85t7z9zrsWaziSSpPLVRd0CSNBoGgCQVygCQpEIZAJJUKANAkgq1ZtQd6NfZs2ebc3PVfrFUr49RdduSOE/9cZ56c476M4x5Wru2/n1gcrG6VRMAc3NNZmZOVNq20RivvG1JnKf+OE+9OUf9GcY8TU5OfGepOi8BSVKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoVbNncDL9cJNG9m4vvdwnzt1huPHnhtCjyRptIoJgI3r13DhB77cs923P/IWjg+hP5I0al4CkqRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKh+noYXES8FvhoZu6IiF8E7gOawBPArsw8GxE3AjcBZ4A9mflQRGwEHgS2ALPA9Zl5JCJeB/xVu+1XM/OD53pgkqTuep4BRMQtwKeBDe2iu4DdmbkdGAOuiYgLgJuBS4ErgTsjYj2wEzjUbvsAsLv9Nz4FXAdsA14bEa8+d0OSJPWjnzOAZ4C3A59pv78YeLT9ej/wJmAOmM7MU8CpiDgMXETrA/5jHW3viIhNwPrMfAYgIh4Bfgv4z26dqNfHaDTG+x3Xgm0Hu9JVdT+rXb1eK3bsg3CeenOO+jPqeeoZAJn5+Yi4sKNoLDOb7dezwGZgE3C0o81i5Z1lxxa0fXmvfszNNZmZOdGr2aIajXFqtXrf7avuZ7VrNMaLHfsgnKfenKP+DGOeJicnlqyrsgh8tuP1BDBD6wN9okd5r7aSpCGqEgCPR8SO9uurgAPAY8D2iNgQEZuBrbQWiKeBqzvbZuYx4HREvCIixmitGRxYxhgkSRVU+S8h3w9MRcQ64ElgX2bORcReWh/kNeD2zDwZEfcA90fEQeA0rYVfgPcCfw/Uaf0K6OvLHYgkaTBjzWazd6sV4Pnn55rLWQNYu7be9/8JfOTIbKX9rHZet+2P89Sbc9SfIa0BfBN4zWJ13ggmSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCrWmykYRsRa4H7gQmANuBM4A9wFN4AlgV2aejYgbgZva9Xsy86GI2Ag8CGwBZoHrM/PI8oYiSRpE1TOAq4E1mfl64M+BDwN3AbszczswBlwTERcANwOXAlcCd0bEemAncKjd9gFg9/KGIUkaVKUzAOBpYE1E1IBNwPPA64BH2/X7gTfROjuYzsxTwKmIOAxcBGwDPtbR9o5eO6zXx2g0xit1tl4fLOeq7me1q9drxY59EM5Tb85Rf0Y9T1UD4Dityz9PAS8G3gpclpnNdv0ssJlWOBzt2G6x8vmyrubmmszMnKjU2UZjnFqt3nf7qvtZ7RqN8WLHPgjnqTfnqD/DmKfJyYkl66peAvpj4JHMfCXwKlrrAes66ieAGeBY+3W38vkySdIQVQ2AH/Kjb/A/ANYCj0fEjnbZVcAB4DFge0RsiIjNwFZaC8TTtNYROttKkoao6iWgTwD3RsQBWt/8bwO+AUxFxDrgSWBfZs5FxF5aH/A14PbMPBkR9wD3R8RB4DRw3XIHIkkaTKUAyMzjwO8tUnX5Im2ngKkFZSeAa6vsW5J0bngjmCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSrUmqobRsStwG8D64C7gUeB+4Am8ASwKzPPRsSNwE3AGWBPZj4UERuBB4EtwCxwfWYeWc5AJEmDqXQGEBE7gNcDlwKXAy8F7gJ2Z+Z2YAy4JiIuAG5ut7sSuDMi1gM7gUPttg8Au5c5DknSgKpeAroSOAR8AfgS8BBwMa2zAID9wBXAJcB0Zp7KzKPAYeAiYBvw8IK2kqQhqnoJ6MXAy4C3Ar8AfBGoZWazXT8LbAY2AUc7tlusfL6sq3p9jEZjvFJn6/XBcq7qfla7er1W7NgH4Tz15hz1Z9TzVDUAngWeyszTQEbESVqXgeZNADPAsfbrbuXzZV3NzTWZmTlRqbONxji1Wr3v9lX3s9o1GuPFjn0QzlNvzlF/hjFPk5MTS9ZVvQR0EHhzRIxFxM8DLwC+1l4bALgKOAA8BmyPiA0RsRnYSmuBeBq4ekFbSdIQVToDaP+S5zJaH/A1YBfw38BURKwDngT2ZeZcROyl9QFfA27PzJMRcQ9wf0QcBE4D152DsUiSBlD5Z6CZecsixZcv0m4KmFpQdgK4tuq+JUnL541gklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVBrlrNxRGwBvgm8ETgD3Ac0gSeAXZl5NiJuBG5q1+/JzIciYiPwILAFmAWuz8wjy+mLJGkwlc8AImIt8DfAc+2iu4DdmbkdGAOuiYgLgJuBS4ErgTsjYj2wEzjUbvsAsLv6ECRJVSznEtDHgU8B/9t+fzHwaPv1fuAK4BJgOjNPZeZR4DBwEbANeHhBW0nSEFW6BBQR7waOZOYjEXFru3gsM5vt17PAZmATcLRj08XK58u6qtfHaDTGq3SXen2wnKu6n9WuXq8VO/ZBOE+9OUf9GfU8VV0DeA/QjIgrgF+jdRlnS0f9BDADHGu/7lY+X9bV3FyTmZkTlTrbaIxTq9X7bl91P6tdozFe7NgH4Tz15hz1ZxjzNDk5sWRdpUtAmXlZZl6emTuA/wLeBeyPiB3tJlcBB4DHgO0RsSEiNgNbaS0QTwNXL2grSRqic/kz0PcDH4yIfwfWAfsy83vAXlof8P8M3J6ZJ4F7gF+JiIPAHwIfPIf9kCT1YVk/AwVonwXMu3yR+ilgakHZCeDa5e5bklSdN4JJUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQq2pslFErAXuBS4E1gN7gG8B9wFN4AlgV2aejYgbgZuAM8CezHwoIjYCDwJbgFng+sw8sryhSJIGUfUM4B3As5m5HbgK+CRwF7C7XTYGXBMRFwA3A5cCVwJ3RsR6YCdwqN32AWD38oYhSRpU1QD4HHBHx/szwMXAo+33+4ErgEuA6cw8lZlHgcPARcA24OEFbSVJQ1TpElBmHgeIiAlgH61v8B/PzGa7ySywGdgEHO3YdLHy+bKu6vUxGo3xKt2lXh8s56ruZ7Wr12vFjn0QzlNvzlF/Rj1PlQIAICJeCnwBuDszPxsRH+uongBmgGPt193K58u6mptrMjNzolJfG41xarV63+2r7me1azTGix37IJyn3pyj/gxjniYnJ5asq3QJKCJeAnwV+NPMvLdd/HhE7Gi/vgo4ADwGbI+IDRGxGdhKa4F4Grh6QVtJ0hBVPQO4DXgRcEdEzK8FvA/YGxHrgCeBfZk5FxF7aX3A14DbM/NkRNwD3B8RB4HTwHXLGoUkaWBV1wDeR+sDf6HLF2k7BUwtKDsBXFtl35Kkc8MbwSSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKtWbUHVhpTj4/x+TkRM92z506w/Fjzw2hR5L002EALLBhbZ0LP/Dlnu2+/ZG3cHwI/ZGknxYvAUlSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRC+TPQirxfQNJqN7IAiIgacDfwKuAU8AeZeXhU/RlUv/cLPPWhNxsUklakUZ4B/A6wITN/MyJeB/wFcM0I+/NTYVBIWqlGGQDbgIcBMvM/IuI1I+zLyJ3roDj5/Bwb1tb72rehIpVprNlsjmTHEfFp4POZub/9/rvAyzPzzBKbHAG+M6z+SdJ54mXA5GIVozwDOAZ0fpWtdfnwhyUGIEmqZpQ/A50GrgZorwEcGmFfJKk4ozwD+ALwxoj4N2AM+P0R9kWSijOyNQBJ0mh5J7AkFcoAkKRCGQCSVKjz6llAvR4vERFvA/4MOAPcm5lTI+noCPUxR38C3EDrvguAmzIzh97RFSIiXgt8NDN3LCgv/ljq1GWeij+eImItcC9wIbAe2JOZX+yoH9mxdF4FAF0eL9H+R/gE8BvA/wHTEfGlzPzeyHo7Gr0ewfFq4F2Z+c2R9G4FiYhbgHfSOl46yz2WOiw1T20eT/AO4NnMfGdE/CzwOPBFGP2xdL5dAvqxx0sAnY+X2AoczswfZuZp4CCwffhdHLlucwRwMXBrRByMiFuH3bkV5hng7YuUeyz9uKXmCTyeAD4H3NHxvvOG15EeS+dbAGwCjna8n4uINUvUzQKbh9WxFaTbHAH8A/Be4A3Atoh46zA7t5Jk5ueB5xep8ljq0GWewOOJzDyembMRMQHsA3Z3VI/0WDrfAqDb4yUW1k0AM8Pq2Aqy5BxFxBjwl5n5/fa3kS8Dvz6CPq50Hkt98Hj6kYh4KfAvwGcy87MdVSM9ls63NYBp4G3APy7yeIkngV+KiJ8BjgOXAR8ffhdHrtscbQKeiIittK5HvoHW4pV+nMdSfzyegIh4CfBV4I8y82sLqkd6LJ1vAfATj5eIiOuAF2bm37Z/kfAIrTOfezPzf0bY11HpNUe30fqmcgr4WmZ+ZYR9XVE8lvrj8fQTbgNeBNwREfNrAVPAC0Z9LPkoCEkq1Pm2BiBJ6pMBIEmFMgAkqVAGgCQV6nz7FZAknZeWet7SgjbvBnYCdeCfMvND3f6mZwCStMK1n7f0aWBDlzavoPXhvwO4BFjXftbQkjwDkKSVb/55S58BiIhfBfbSupfnWeA9wBXAN4D7gZ8DPpyZSz2iA/AMQJJWvEWetzQF7GpfDvoKcAvwYlp3Et8A/C7w1xHR6PZ3PQOQpNVnK3B3RACsBZ4Gvg78a2bOArMR8S3glcBjS/0RA0CSVp+k9f8sfDciLqV1ySeBXRGxgdYi8C8Dh7v8DQNAklahncADEVFvv78hM5+OiL+j9cDHMeBDmfmDbn/EZwFJUqFcBJakQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVD/Dwjv7QEQoF8mAAAAAElFTkSuQmCC\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(df.msrp,bins=30)\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T01:59:06.047075300Z",
     "start_time": "2023-12-06T01:59:05.756074400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD3CAYAAAAe5+9lAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAVMElEQVR4nO3df4zc9X3n8efu2uyPsvamF7uoElf3rnfvupHcqERAMQZXJXGAqpyQaKMoDf3lUETrJMqJ/LC5Hj1zhDSlja8XUi2HgNDqdIVGaly5pBcCst3cualSlVzJOzIqyR/XVC7J+keNFzze++M7G+bM7szszOyPmc/z8dfMZz7j7+e9M37Ndz7f73y+Q3Nzc0iSBt/wag9AkrQyDHxJKoSBL0mFMPAlqRAGviQVYt1qD2AhFy5cmKvVmp89NDIyRKs+g8i6y2LdZem27vXrR/4J2LTY42sy8Gu1OWZmzjbtMzU10bLPILLuslh3Wbqte9OmyW82e9wpHUkqhIEvSYUw8CWpEAa+JBXCwJekQhj4klQIA1+SCmHgS1Ih2vrhVURcBTyQmTsjYjMwDbwJGAHem5kvRsRu4A7gPLA/Mw9GxDjwBLAZOA3cnpknlqMQSVJzLQM/Iu4GfgH453rTJ4A/zMz/ERE/BfxoRPwzsAd4GzAGHImIvwDuBJ7PzP8YEe8C9gHvX4Y6vufSDeOMj7b+HHtl9jxnTr2ynEORpDWlnT38F4Fbgc/W728H/jYi/ifwElWA/zRwNDNngdmIOA5sA66l+oAAOATc07uhL2x8dB1bPvJnLfu99PGbObPcg5GkNaRl4GfmUxGxpaFpC/DdzLwhIv4D8GHgG8DJhj6ngY3Ahob2+baWRkaGmJqaaNFnuGWfVrp9/mroRd39yLrLYt3Lo5PF014G/rR++/PAfcBXgMmGPpPADHCqoX2+raVuFk/btGlygd4L68fFmVxUqizWXZYeLJ7W9PFOztI5AtxUv30d8H+AY8COiBiLiI3AVuBrwNGGvjcChzvYniSpBzoJ/A8B742IvwTeCfznzPw2cIAq0J8B9mbmOeAh4C0RcQR4H3Bvb4YtSVqqtqZ0MvMl4Or67W8Cb1+gzzTV6ZqNbWeB27oepSSpa/7wSpIKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBXCwJekQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIdq6xGFEXAU8kJk7G9reDfxGZv5k/f5u4A7gPLA/Mw9GxDjwBLAZOA3cnpkneluCJKkdLffwI+Ju4GFgrKHtrcCvAEP1+5cBe4DtwC7g/ogYBe4Ens/MHcDjwL5eFyBJak87e/gvArcCnwWIiH8BfBz4AK9ftPxK4GhmzgKzEXEc2AZcC3yi3ucQcE87gxoZGWJqaqJFn+GWfVrp9vmroRd19yPrLot1L4+WgZ+ZT0XEFoCIGAH+G/BB4JWGbhuAkw33TwMbL2qfb2upVptjZuZs0z5TUxML9tm0abKdTQC03MZatFjdg866y2LdnWmVf23N4Te4Avg3wENUUzw/FhG/BzwDNG5pEpgBTjW0z7dJklbBkgI/M48BbwGo7/X/98z8QH0O/76IGANGga3A14CjwE3AMeBG4HDvhi5JWoqenJaZmd8GDlAF+jPA3sw8R/VN4C0RcQR4H3BvL7YnSVq6tvbwM/Ml4OpmbZk5zesHcefbzgK3dTtISVL3/OGVJBXCwJekQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFaKtK15FxFXAA5m5MyLeCvwXoAbMAu/NzH+MiN3AHcB5YH9mHoyIceAJYDNwGrg9M08sRyGSpOZa7uFHxN3Aw8BYvelTwG9k5k7gT4AP1y9ivgfYDuwC7o+IUeBO4PnM3AE8DuzreQWSpLa0M6XzInBrw/13Zebf1G+vA84BVwJHM3M2M08Cx4FtwLXAn9f7HgJu6MmoJUlL1nJKJzOfiogtDff/ASAirgF+HbiOaq/+ZMPTTgMbgQ0N7fNtLY2MDDE1NdGiz3DLPq10+/zV0Iu6+5F1l8W6l0dbc/gXi4ifB/YCN2fmiYg4BUw2dJkEZoDG9vm2lmq1OWZmzjbtMzU1sWCfTZsmF+i9sFbbWIsWq3vQWXdZrLszrfJvyYEfEe+hOji7MzO/U28+BtwXEWPAKLAV+BpwFLip/viNwOGlbk+S1BtLCvyIGAEOAN8C/iQiAJ7LzN+MiANUgT4M7M3McxHxEPBYRBwBXgXe3dPRS5La1lbgZ+ZLwNX1u9+/SJ9pYPqitrPAbV2MTz106YZxxkdbv+SvzJ7nzKlXVmBEklZSR3P46k/jo+vY8pE/a9nvpY/fzJkVGI+kleUvbSWpEAa+JBXCwJekQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEK4ls4AaHdRNEllMyUGwFIWRZNULqd0JKkQ7uHrDc69VmvrUpGumy/1FwNfbzC2fsR186UB5JSOJBWirT38iLgKeCAzd0bEjwCPAnNUFyq/KzMvRMRuqoubnwf2Z+bBiBgHngA2A6eB2zPzxDLUIUlqoeUefkTcDTwMjNWbHgT2ZeYOYAi4JSIuA/YA24FdwP0RMQrcCTxf7/s4sK/3JUiS2tHOHv6LwK3AZ+v3rwCeq98+BLwDqAFHM3MWmI2I48A24FrgEw1972lnUCMjQ0xNTbToM9yyTyvdPn819KLuXlqpsay1uleKdZdluetuGfiZ+VREbGloGsrMufrt08BGYANwsqHPQu3zbS3VanPMzJxt2mdqamLBPu2cXTKv1TbWooXqXkrNvbZSf8PFXu9BZ91l6bbuVlnQyUHbCw23J4EZ4FT9drP2+TZJ0iroJPC/GhE767dvBA4Dx4AdETEWERuBrVQHdI8CN13UV5K0CjoJ/A8B90bEl4FLgCcz89vAAapAfwbYm5nngIeAt0TEEeB9wL29GbYkaanaOi0zM18Crq7f/gZw/QJ9poHpi9rOArd1PUpJUtf84ZUkFcLAl6RCGPiSVAgXT1PHXFVT6i8GvjrmqppSf3FKR5IKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBXCwJekQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mF6GjxtIhYDzwGbAFqwG7gPPAoMEd1Pdu7MvNCROwG7qg/vj8zD3Y/bEnSUnW6h38TsC4zrwF+C7gPeBDYl5k7gCHgloi4DNgDbAd2AfdHxGj3w5YkLVWngf8NYF1EDAMbgNeAK4Dn6o8fAm4ArgSOZuZsZp4EjgPbuhuyJKkTna6Hf4ZqOufrwJuBnwGuy8y5+uOngY1UHwYnG543397UyMgQU1MTLfoMt+zTSrfPXw29qHs1dDvmfq27W9ZdluWuu9PA/yDwdGZ+NCIuB54BLml4fBKYAU7Vb1/c3lStNsfMzNmmfaamJhbs084VmOa12sZatFDdS6l5NZx7rcbY+pG2+i52dazFXu9BZ91l6bbuVlnQaeB/l2oaB+A7wHrgqxGxMzOfBW4EvgQcA+6LiDFgFNhKdUBXBWn3yljg1bGk5dRp4P8u8EhEHKbas/8Y8BVgOiIuAV4AnszMWkQcAA5THS/Ym5nnejBuSdISdRT4mXkG+LkFHrp+gb7TwHQn25Ek9Y4XMV/DLt0wzvjoG1+itT5nL2ltMvDXsPHRdW3Nfb/08ZtXYDSS+p1LK0hSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBXCwJekQhj4klQIA1+SCtHxBVAi4qPAz1Jd0/bTwHPAo8Ac1YXK78rMCxGxG7gDOA/sz8yD3Q5ag+vca7VFr+jV2P7K7HnOnHplpYYlDYSOAj8idgLXANuBCeDfAw8C+zLz2Yj4DHBLRHwZ2AO8DRgDjkTEX2TmbC8Gr8Eztn6k7at8nVmB8UiDpNMpnV3A88DngM8DB4ErqPbyAQ4BNwBXAkczczYzTwLHgW1djViS1JFOp3TeDPwQ8DPADwN/Cgxn5lz98dPARmADcLLhefPtTY2MDDE1NdGiz3DLPq10+3ytrhJev168z/uRdS+PTgP/ZeDrmfkqkBFxDri84fFJYAY4Vb99cXtTtdocMzNnm/aZmppYsM9i878LabWN1baUWkq01l+/XljsfT7orLszrTKj0ymdI8A7I2IoIn4Q+D7gi/W5fYAbgcPAMWBHRIxFxEZgK9UBXUnSCutoDz8zD0bEdVSBPgzcBfw9MB0RlwAvAE9mZi0iDlCF/zCwNzPP9WbokqSl6Pi0zMy8e4Hm6xfoNw1Md7odSVJv+MMrSSqEgS9JhTDwJakQHc/hS6up2RIMjVyCQXqdga++5BIM0tI5pSNJhXAPfxVcumGc8VH/9JJWlqmzCsZH17U9HSFJveKUjiQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCdLW0QkRsBv4aeDtwHngUmKO6UPldmXkhInYDd9Qf35+ZB7sasSSpIx3v4UfEeuAPgPnFxh8E9mXmDmAIuCUiLgP2ANuBXcD9ETHa3ZAlSZ3oZg//k8BngI/W718BPFe/fQh4B1ADjmbmLDAbEceBbcBfNfuHR0aGmJqaaLrxkZHhln1a6fb56g/9/Dr34n3ej6x7eXQU+BHxi8CJzHw6IuYDfygz5+q3TwMbgQ3AyYanzrc3VavNMTNztmmfqamJBfu0cxWkea22sVyWMkZ1b7Ve515Y7H0+6Ky7M62ypdM9/F8G5iLiBuCtwOPA5obHJ4EZ4FT99sXt0orwUojS6zoK/My8bv52RDwL/Brw2xGxMzOfBW4EvgQcA+6LiDFgFNhKdUBXWhFeClF6XS8vgPIhYDoiLgFeAJ7MzFpEHAAOUx0g3puZ53q4TUlSm7oO/Mzc2XD3+gUenwamu92OJKk7/vBKkgph4EtSIQx8SSqEgS9JhejlWTpS3/J8fZXAwJfwfH2VwSkdSSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYVwaQVpCVxzR/2so8CPiPXAI8AWqmvV7gf+DngUmKO6bu1dmXkhInYDdwDngf2ZebD7YUurwzV31M86ndJ5D/ByZu6gumD57wMPAvvqbUPALRFxGbAH2A7sAu6PiNHuhy1JWqpOp3T+GHiy4f554Argufr9Q8A7gBpwNDNngdmIOA5sA/6qw+1KkjrUUeBn5hmAiJikCv59wCczc67e5TSwEdgAnGx46nx7UyMjQ0xNTbToM9yyTyvdPl9qphfvr168z/uRdS+Pjg/aRsTlwOeAT2fmH0XEJxoengRmgFP12xe3N1WrzTEzc7Zpn6mpiQX7tHNAbV6rbSyXpYxR/asX76/F3ueDzro70ypbOj1o+wPAF4Bfz8wv1pu/GhE7M/NZqnn9LwHHgPsiYozq4O5WqgO6A+fSDeOMj3rSk6S1q9OE+hjwJuCeiLin3vZ+4EBEXAK8ADyZmbWIOAAcpjpAvDczz3U76LVofHRdW2dvQHUGhySttE7n8N9PFfAXu36BvtPAdCfbkfqV5+trLXIOQloGnq+vtcjAl/pEu8eJ/NagxRj4Up9o9ziR3xq0GBdPk6RCGPiSVAindKRV1M7ZPP5QT71i4EurqN2zeaD93294SqgWY+BLA8ZTQrUY5/AlqRAGviQVwsCXpEI4hy8VyoO75THwpUJ5cLc8Br6knnCtn7XPwJfUE+2u9fP1//TOtqaSJqcmGFs/0ta2/RBpj4Evqal25/rbtZSppHZ/lNbuh8i512ptfYgM6geIgS+pqaUE9Grp9YfIoH6AFBv4nqEgaTGr+QGynJY98CNiGPg08OPALPCrmXl8ubfbSrsvaLsvlCQtZikfIKeXcRwrsYf/74CxzPzJiLga+B3glhXYbk/0w9dZSWrHSvzS9lrgzwEy838Bb1uBbUqSLjI0Nze3rBuIiIeBpzLzUP3+t4B/lZnnmzztBPDNZR2YJA2eHwI2LfbgSkzpnAIaJ8GHW4Q9NBmwJKkzKzGlcxS4CaA+h//8CmxTknSRldjD/xzw9oj4S2AI+KUV2KYk6SLLPocvSVobXA9fkgph4EtSIQx8SSpEX62ls1aXaehERKwHHgG2AKPAfuDvgEeBOeBrwF2ZeSEidgN3AOeB/Zl5MCLGgSeAzcBp4PbMPFE/E+pT9b5fyMx7V7SwNkXEZuCvgbdTjfVRBrzuiPgo8LPAJVTv4+coo+71wGNU7/UasJsBf80j4irggczcGRE/wjLVGhG/Cdxcb/9AZh5rNq5+28P/3jINwEeolmnoV+8BXs7MHcCNwO8DDwL76m1DwC0RcRmwB9gO7ALuj4hR4E7g+Xrfx4F99X/3M8C7qX7hfFVE/MQK1tSWegD8ATC/Kt3A1x0RO4FrqOq5HricAuquuwlYl5nXAL8F3McA1x4RdwMPA2P1pmWptV7v9cBVwLuA/9pqbP0W+IO0TMMfA/c03D8PXEG11wdwCLgBuBI4mpmzmXkSOA5so+FvMd83IjYAo5n5YmbOAU8DP73slSzdJ6newP+3fr+EundR/Qblc8DngYOUUTfAN4B19W/oG4DXGOzaXwRubbi/XLVeS7W3P5eZ36L6Gzf90Wq/Bf4G4GTD/VpE9NW01LzMPJOZpyNiEniS6pN8qP6CQvV1biNvrHmh9sa2Uwv0XTMi4heBE5n5dEPzwNcNvJlqB+U24NeAP6T61fmg1w1whmo65+vANHCAAX7NM/Mpqg+1ectV62L/xqL6LfA7WaZhzYqIy4EvAZ/NzD8CLjQ8PAnM8MaaF2pv1Xct+WWqH+I9C7yV6mvr5obHB7Xul4GnM/PVzEzgHP//f85BrRvgg1S1/1uq42+PUR3HmDfItcPy/b9e8t+g3wJ/YJZpiIgfAL4AfDgzH6k3f7U+1wvVvP5h4BiwIyLGImIjsJXqwM/3/hbzfTPzFPBqRPzriBiimkY4vCIFtSkzr8vM6zNzJ/A3wHuBQ4NeN3AEeGdEDEXEDwLfB3yxgLoBvsvre6LfAdZTwHu9wXLVehTYFRHDEfEvqXaA/6nZQPptOmSQlmn4GPAm4J6ImJ/Lfz9wICIuAV4AnszMWkQcoHqBh4G9mXkuIh4CHouII8CrVAd04PXpghGq+b3/vXIldexDwPQg110/A+M6qv/ow8BdwN8z4HXX/S7wSEQcptqz/xjwFcqoHZbx/V3/m36Z199TTbm0giQVot+mdCRJHTLwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiH+HzErWW10SJZcAAAAAElFTkSuQmCC\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(df.msrp[df.msrp<100000],bins=30)\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:00:12.859298600Z",
     "start_time": "2023-12-06T02:00:12.613298200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD3CAYAAAAT+Z8iAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAPrElEQVR4nO3db4xc1XnH8e96195dC9tbNaaoVVUrCn0EkUgVKBD+CEtNE8BNaalQEUqLmsRBiAQSpaIWGKVpoBgUSOOgENWEmgRVjQpBSkEORG1CsJOKikIFDTxgWidvmoqkXeyN7QWvty/mWl0vszuz4525s2e/nzfcOXPW97nDvb89PnPu9cD09DSSpHKtqLsASVJ3GfSSVDiDXpIKZ9BLUuEMekkq3FDdBTRz9OjR6amp+lYDDQ4OUOf+F8p6u8t6u8t6F8f09DSrVg09Dlw8+72+DPqpqWnGxw/Wtv+xsdW17n+hrLe7rLe7rHfxrF+/5m3N2p26kaTCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwvXlnbFavk5aO8rocOvT8tDkESb2H+pBRdLSZ9Crr4wOD7Fhy2Mt++3btomJHtQjlcCpG0kqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSrcvP/CVESsBO4HNgDDwK3AD4GdwDTwAnBdZh6NiM3ANcAR4NbMfDQiRoEHgZOBA8DVmfladw5FktRMqxH9B4GfZeaFwCXAPcDdwNaqbQC4LCJOAa4HzgfeD9weEcPAtcDzVd+vAlu7cxiSpLm0Cvq/B26Z8foIcCbwZPV6F/Be4GxgT2ZOZubrwF7gDOAC4Fuz+kqSemjeqZvMnACIiDXAQzRG5J/LzOmqywFgHbAWeH3GjzZrP9bW0uDgAGNjq9s8hMU3OLii1v0v1HKtt1fHvFw/316x3u6bN+gBIuJXgUeAL2Xm30bEnTPeXgOMA/ur7fnaj7W1NDU1zfj4wXa6dsXY2Opa979QJdW7fv2apu3N9OqYS/p8+5H1Lp65rp9WX8b+EvAE8LHM/Meq+dmI2JiZ36Uxb/8d4GngtogYofGl7Wk0vqjdA1xavX8J8NQJH4kEHH5zqq1fCocmjzCx/1APKpL6V6sR/U3ALwC3RMSxufobgO0RsQp4EXgoM6ciYjuNIF8B3JyZhyPiXuCBiNgNvAFc1ZWj0LIzsnKQDVsea9lv37ZNTPSgHqmftZqjv4FGsM92UZO+O4Ads9oOAlecSIGSpBPjDVOSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkq3FDdBah8J60dZXT4+FNt/fo1NVUjLT8GvbpudHiIDVsea6vvvm2bulyNtPw4dSNJhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqXFs3TEXEOcAdmbkxIt4N/APwSvX2vZn59YjYDFwDHAFuzcxHI2IUeBA4GTgAXJ2Zry36UUiS5tQy6CPiRuCPgJ9XTe8G7s7Mu2b0OQW4HjgLGAF2R8S3gWuB5zPzzyPiSmArcMPiHoIkaT7tjOhfBS4Hvla9PhOIiLiMxqj+E8DZwJ7MnAQmI2IvcAZwAXBn9XO7gFsWsXZJUhtaBn1mPhwRG2Y0PQ3cl5nPRMTNwKeB54DXZ/Q5AKwD1s5oP9bW0uDgAGNjq9vp2hWDgytq3f9CLbV6e+1EP5ul9vlab3cttXqhs4eaPZKZ48e2gS8C3wNmPo5wDTAO7J/RfqytpampacbHD3ZQ2uIYG1td6/4Xqt/rrftJlSf62fT75zub9XZXP9c717XWyaqbxyPi7Gr7t4BnaIzyL4yIkYhYB5wGvADsAS6t+l4CPNXB/iRJJ6CTEf21wD0R8QbwE+Cjmbk/IrbTCPIVwM2ZeTgi7gUeiIjdwBvAVYtVuCSpPW0FfWbuA86ttv8VOK9Jnx3AjlltB4ErTrhKSVLHvGFKkgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCdXLDlLRkHH5zqq1HMByaPMLE/kM9qEjqPYNeRRtZOciGLY+17Ldv2yYmelCPVAenbiSpcAa9JBXOoJekwhn0klQ4g16SClfcqpuT1o4yOtz6sFxOJ2m5KC7oR4eHXE4nSTM4dSNJhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Yba6RQR5wB3ZObGiHgHsBOYBl4ArsvMoxGxGbgGOALcmpmPRsQo8CBwMnAAuDozX+vCcUiS5tByRB8RNwL3ASNV093A1sy8EBgALouIU4DrgfOB9wO3R8QwcC3wfNX3q8DWxT8ESdJ82hnRvwpcDnyten0m8GS1vQt4HzAF7MnMSWAyIvYCZwAXAHfO6HtLO0UNDg4wNra6rQM4EXPtY3BwRU/2v1iWWr39yvOhHtbbfS2DPjMfjogNM5oGMnO62j4ArAPWAq/P6NOs/VhbS1NT04yPH2yn61usX7+m7b5z7WNsbHXH+69Dv9e7kP8ndfJ8qIf1Lp65rrVOvow9OmN7DTAO7K+252s/1iZJ6qFOgv7ZiNhYbV8CPAU8DVwYESMRsQ44jcYXtXuAS2f1lST1UCdB/yngMxHxA2AV8FBm/gTYTiPI/wm4OTMPA/cC74yI3cBHgc8sTtmSpHa1tbwyM/cB51bbLwMXNemzA9gxq+0gcMUJVylJ6pg3TElS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klS4tp5HLzVz0tpRRoc9haR+51Wqjo0OD7Fhy2Mt++3btqkH1Uiai1M3klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAur5SAw29OsX79mjnfP/beockjTOw/1KuypEVh0EvAyMrBtu8JmOhBPdJicupGkgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcB0/vTIingVer17+J3AbsBOYBl4ArsvMoxGxGbgGOALcmpmPnlDFkqQF6SjoI2IEIDM3zmj7JrA1M78bEV8GLouIHwDXA2cBI8DuiPh2Zk6ecOWSpLZ0OqJ/F7A6Ip6o/oybgDOBJ6v3dwHvA6aAPVWwT0bEXuAM4F/m+8MHBwcYG1vdYWntm2sfg4MrerL/xbLU6l3q+v2zXmrng/V2X6dBfxD4HHAfcCqNYB/IzOnq/QPAOmAt/z+9M7N9XlNT04yPH+yosPn+laDZ5trH2Njqjvdfh7rqXchnXZJ+Pzc8f7urn+ud65rsNOhfBvZWwf5yRPyMxoj+mDXAOLC/2p7dLknqkU5X3XwIuAsgIn6Zxsj9iYjYWL1/CfAU8DRwYUSMRMQ64DQaX9RKknqk0xH9V4CdEbGbxiqbDwE/BXZExCrgReChzJyKiO00Qn8FcHNmHl6EuiVJbeoo6DPzDeCqJm9d1KTvDmBHJ/uRJJ04b5iSpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCdfwPj0jL0eE3p9p6auehySNM7D/Ug4qk1gx6aQFGVg6yYctjLfvt27aJiR7UI7XDqRtJKpwjer3FSWtHGR321JBK4dWstxgdHmp7ekJS/3PqRpIKZ9BLUuEMekkqnHP0Uhe43l79xKCXusD19uonTt1IUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwi3b5ZWt1jkfe891zpKWumUb9K5zlrRcOHUjSYVbtiN6qR+0+6gEcBpRnTPopRq1O4UITiOqcwa9tES0O/r3gWqazaBvwYtG/WIhCwja6ffSZy/23F4mDPoWXJ2jUnluLx8GvaR5+bfapc+glzQvR/5Ln+voJalwBr0kFc6pm0WyFG58OWntKKPD/i+XlpuuX/URsQL4EvAuYBL4SGbu7fZ+e20p3PgyOjzU9lyrtFALWec/snLwuLZmP+eXu4unF8O73wNGMvM9EXEucBdwWQ/227dcxaASLfY6f7/cXTy9CPoLgG8BZOY/R8RZPdhnX2v3gmj3hpaFTBtJS0VJ06F1D9oGpqenu7qDiLgPeDgzd1Wvfwy8PTOPzPNjrwE/6mphklSWn1b/vXj2G70Y0e8HZv5aXtEi5AHWd7EeSVpWerG8cg9wKUA1R/98D/YpSar0YkT/CPDbEfF9YAD4kx7sU5JU6focvSSpXt4ZK0mFM+glqXAGvSQVzgefzBARK4EHgA3AFLA5M1+qtah5RMQw8DfA22ksY70uM1+pt6rmIuIc4I7M3BgR7wB2AtPACzTqPlpnfbPNrLd6/fvAFZl5Va2FNTHrs/0N4Is0zt9J4I8z879rLXCWWfWeDvw1jYUa/wZ8PDOnai1wltnnQtV2FY1a31NbYQvgiP54lwJDmXke8BfAbTXX08pmYCIzzwU+DtxTcz1NRcSNwH3ASNV0N7A1My+kcYH31SMxZtcbEV8AbqcPr5cmn+0XaATQRuAbwJ/VVFpTTer9S+CmzDwfWA38bl21NdOkXqpfph+mce4uCX134tbsZWCoehDbWuDNmutp5XRgF0BmJnBaveXM6VXg8hmvzwSerLZ3Ae/teUXzm13v94Fra6qlldm1XpmZz1XbQ8Dh3pc0r9n1/kFmfi8iVgGnAH31tw9m1RsRvwhsAz5RW0UdMOiPN0Fj2uYlYAewvdZqWnsO+J2IGKhuRvuViBhs9UO9lpkPc/wvzYHMPLau9wCwrvdVzW12vZn5dRrTTH2nSa3/BRAR5wEfAz5fU2lNNal3KiJ+Dfh34G1A1lVbMzPrra6trwCfpHHeLhkG/fE+CTyemb9O47HKD0TESIufqdP9NObmvwN8AHim3+Y35zBzPn4NMF5XISWKiD8EvgxsyszX6q6nlcz8UWaeSqPmu+uuZx5nAqcC9wJ/B5weEX9Vb0ntMeiP97/A69X2/wArgb4bIc/wm8Duaj72EeA/6i2nbc9GxMZq+xLgqRprKUpEfJDGSH5jZvb9+RAR34yIU6uXBzh+ENBXMvPpzHxndb1dCfwwM5fEFI6rbo73eeD+iHgKWEXjS6Kf11zTfF4BPhsRf0pjVPzhmutp16eAHdW87IvAQzXXU4RqamE78GPgGxEB8GRmfrrWwua3DdgZEW8AB4GP1FxPkXwEgiQVzqkbSSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIK938nkZF15sPjgAAAAABJRU5ErkJggg==\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "log_price = np.log1p(df.msrp)\n",
    "plt.hist(log_price,bins=30)\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:01:09.671996100Z",
     "start_time": "2023-12-06T02:01:09.432982700Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 划分训练集和验证集"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "outputs": [
    {
     "data": {
      "text/plain": "0    10.739349\n1    10.612779\n2    10.500977\n3    10.290483\n4    10.448744\nName: msrp, dtype: float64"
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = np.log1p(df.msrp)\n",
    "del df['msrp']\n",
    "y.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:04:00.549564Z",
     "start_time": "2023-12-06T02:04:00.535532200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "outputs": [
    {
     "data": {
      "text/plain": "0    46135.0\n1    40650.0\n2    36350.0\n3    29450.0\n4    34500.0\nName: msrp, dtype: float64"
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_org = np.expm1(y)\n",
    "y_org.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:05:24.878856600Z",
     "start_time": "2023-12-06T02:05:24.851855500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "outputs": [
    {
     "data": {
      "text/plain": "  make       model  year             engine_fuel_type  engine_hp  \\\n0  bmw  1_series_m  2011  premium_unleaded_(required)      335.0   \n1  bmw    1_series  2011  premium_unleaded_(required)      300.0   \n2  bmw    1_series  2011  premium_unleaded_(required)      300.0   \n3  bmw    1_series  2011  premium_unleaded_(required)      230.0   \n4  bmw    1_series  2011  premium_unleaded_(required)      230.0   \n\n   engine_cylinders transmission_type     driven_wheels  number_of_doors  \\\n0               6.0            manual  rear_wheel_drive              2.0   \n1               6.0            manual  rear_wheel_drive              2.0   \n2               6.0            manual  rear_wheel_drive              2.0   \n3               6.0            manual  rear_wheel_drive              2.0   \n4               6.0            manual  rear_wheel_drive              2.0   \n\n                         market_category vehicle_size vehicle_style  \\\n0  factory_tuner,luxury,high-performance      compact         coupe   \n1                     luxury,performance      compact   convertible   \n2                luxury,high-performance      compact         coupe   \n3                     luxury,performance      compact         coupe   \n4                                 luxury      compact   convertible   \n\n   highway_mpg  city_mpg  popularity  \n0           26        19        3916  \n1           28        19        3916  \n2           28        20        3916  \n3           28        18        3916  \n4           28        18        3916  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>make</th>\n      <th>model</th>\n      <th>year</th>\n      <th>engine_fuel_type</th>\n      <th>engine_hp</th>\n      <th>engine_cylinders</th>\n      <th>transmission_type</th>\n      <th>driven_wheels</th>\n      <th>number_of_doors</th>\n      <th>market_category</th>\n      <th>vehicle_size</th>\n      <th>vehicle_style</th>\n      <th>highway_mpg</th>\n      <th>city_mpg</th>\n      <th>popularity</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>bmw</td>\n      <td>1_series_m</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>335.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>factory_tuner,luxury,high-performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>26</td>\n      <td>19</td>\n      <td>3916</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,performance</td>\n      <td>compact</td>\n      <td>convertible</td>\n      <td>28</td>\n      <td>19</td>\n      <td>3916</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>300.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,high-performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>28</td>\n      <td>20</td>\n      <td>3916</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury,performance</td>\n      <td>compact</td>\n      <td>coupe</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>bmw</td>\n      <td>1_series</td>\n      <td>2011</td>\n      <td>premium_unleaded_(required)</td>\n      <td>230.0</td>\n      <td>6.0</td>\n      <td>manual</td>\n      <td>rear_wheel_drive</td>\n      <td>2.0</td>\n      <td>luxury</td>\n      <td>compact</td>\n      <td>convertible</td>\n      <td>28</td>\n      <td>18</td>\n      <td>3916</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:06:02.038780500Z",
     "start_time": "2023-12-06T02:06:02.008719400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "outputs": [
    {
     "data": {
      "text/plain": "array([  19.,    0.,    0., ...,    0.,    0., 2011.])"
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.feature_extraction import DictVectorizer\n",
    "df_dict = df.to_dict(orient='records')\n",
    "dv = DictVectorizer(sparse=False) # sparse=False意味着创建的矩阵不是稀疏的,而是一个numpy数组\n",
    "dv.fit(df_dict)\n",
    "X=dv.transform(df_dict)\n",
    "X[0]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:09:00.020519500Z",
     "start_time": "2023-12-06T02:08:58.710480100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "outputs": [
    {
     "data": {
      "text/plain": "['city_mpg',\n 'driven_wheels=all_wheel_drive',\n 'driven_wheels=four_wheel_drive',\n 'driven_wheels=front_wheel_drive',\n 'driven_wheels=rear_wheel_drive',\n 'engine_cylinders',\n 'engine_fuel_type',\n 'engine_fuel_type=diesel',\n 'engine_fuel_type=electric',\n 'engine_fuel_type=flex-fuel_(premium_unleaded_recommended/e85)',\n 'engine_fuel_type=flex-fuel_(premium_unleaded_required/e85)',\n 'engine_fuel_type=flex-fuel_(unleaded/e85)',\n 'engine_fuel_type=flex-fuel_(unleaded/natural_gas)',\n 'engine_fuel_type=natural_gas',\n 'engine_fuel_type=premium_unleaded_(recommended)',\n 'engine_fuel_type=premium_unleaded_(required)',\n 'engine_fuel_type=regular_unleaded',\n 'engine_hp',\n 'highway_mpg',\n 'make=acura',\n 'make=alfa_romeo',\n 'make=aston_martin',\n 'make=audi',\n 'make=bentley',\n 'make=bmw',\n 'make=bugatti',\n 'make=buick',\n 'make=cadillac',\n 'make=chevrolet',\n 'make=chrysler',\n 'make=dodge',\n 'make=ferrari',\n 'make=fiat',\n 'make=ford',\n 'make=genesis',\n 'make=gmc',\n 'make=honda',\n 'make=hummer',\n 'make=hyundai',\n 'make=infiniti',\n 'make=kia',\n 'make=lamborghini',\n 'make=land_rover',\n 'make=lexus',\n 'make=lincoln',\n 'make=lotus',\n 'make=maserati',\n 'make=maybach',\n 'make=mazda',\n 'make=mclaren',\n 'make=mercedes-benz',\n 'make=mitsubishi',\n 'make=nissan',\n 'make=oldsmobile',\n 'make=plymouth',\n 'make=pontiac',\n 'make=porsche',\n 'make=rolls-royce',\n 'make=saab',\n 'make=scion',\n 'make=spyker',\n 'make=subaru',\n 'make=suzuki',\n 'make=tesla',\n 'make=toyota',\n 'make=volkswagen',\n 'make=volvo',\n 'market_category',\n 'market_category=crossover',\n 'market_category=crossover,diesel',\n 'market_category=crossover,exotic,luxury,high-performance',\n 'market_category=crossover,exotic,luxury,performance',\n 'market_category=crossover,factory_tuner,luxury,high-performance',\n 'market_category=crossover,factory_tuner,luxury,performance',\n 'market_category=crossover,factory_tuner,performance',\n 'market_category=crossover,flex_fuel',\n 'market_category=crossover,flex_fuel,luxury',\n 'market_category=crossover,flex_fuel,luxury,performance',\n 'market_category=crossover,flex_fuel,performance',\n 'market_category=crossover,hatchback',\n 'market_category=crossover,hatchback,factory_tuner,performance',\n 'market_category=crossover,hatchback,luxury',\n 'market_category=crossover,hatchback,performance',\n 'market_category=crossover,hybrid',\n 'market_category=crossover,luxury',\n 'market_category=crossover,luxury,diesel',\n 'market_category=crossover,luxury,high-performance',\n 'market_category=crossover,luxury,hybrid',\n 'market_category=crossover,luxury,performance',\n 'market_category=crossover,luxury,performance,hybrid',\n 'market_category=crossover,performance',\n 'market_category=diesel',\n 'market_category=diesel,luxury',\n 'market_category=exotic,factory_tuner,high-performance',\n 'market_category=exotic,factory_tuner,luxury,high-performance',\n 'market_category=exotic,factory_tuner,luxury,performance',\n 'market_category=exotic,flex_fuel,factory_tuner,luxury,high-performance',\n 'market_category=exotic,flex_fuel,luxury,high-performance',\n 'market_category=exotic,high-performance',\n 'market_category=exotic,luxury',\n 'market_category=exotic,luxury,high-performance',\n 'market_category=exotic,luxury,high-performance,hybrid',\n 'market_category=exotic,luxury,performance',\n 'market_category=exotic,performance',\n 'market_category=factory_tuner,high-performance',\n 'market_category=factory_tuner,luxury',\n 'market_category=factory_tuner,luxury,high-performance',\n 'market_category=factory_tuner,luxury,performance',\n 'market_category=factory_tuner,performance',\n 'market_category=flex_fuel',\n 'market_category=flex_fuel,diesel',\n 'market_category=flex_fuel,factory_tuner,luxury,high-performance',\n 'market_category=flex_fuel,hybrid',\n 'market_category=flex_fuel,luxury',\n 'market_category=flex_fuel,luxury,high-performance',\n 'market_category=flex_fuel,luxury,performance',\n 'market_category=flex_fuel,performance',\n 'market_category=flex_fuel,performance,hybrid',\n 'market_category=hatchback',\n 'market_category=hatchback,diesel',\n 'market_category=hatchback,factory_tuner,high-performance',\n 'market_category=hatchback,factory_tuner,luxury,performance',\n 'market_category=hatchback,factory_tuner,performance',\n 'market_category=hatchback,flex_fuel',\n 'market_category=hatchback,hybrid',\n 'market_category=hatchback,luxury',\n 'market_category=hatchback,luxury,hybrid',\n 'market_category=hatchback,luxury,performance',\n 'market_category=hatchback,performance',\n 'market_category=high-performance',\n 'market_category=hybrid',\n 'market_category=luxury',\n 'market_category=luxury,high-performance',\n 'market_category=luxury,high-performance,hybrid',\n 'market_category=luxury,hybrid',\n 'market_category=luxury,performance',\n 'market_category=luxury,performance,hybrid',\n 'market_category=performance',\n 'market_category=performance,hybrid',\n 'model=100',\n 'model=124_spider',\n 'model=190-class',\n 'model=1_series',\n 'model=1_series_m',\n 'model=2',\n 'model=200',\n 'model=200sx',\n 'model=240',\n 'model=240sx',\n 'model=2_series',\n 'model=3',\n 'model=300',\n 'model=300-class',\n 'model=3000gt',\n 'model=300m',\n 'model=300zx',\n 'model=323',\n 'model=350-class',\n 'model=350z',\n 'model=360',\n 'model=370z',\n 'model=3_series',\n 'model=3_series_gran_turismo',\n 'model=400-class',\n 'model=420-class',\n 'model=456m',\n 'model=458_italia',\n 'model=4_series',\n 'model=4_series_gran_coupe',\n 'model=4c',\n 'model=4runner',\n 'model=5',\n 'model=500',\n 'model=500-class',\n 'model=500e',\n 'model=500l',\n 'model=500x',\n 'model=550',\n 'model=560-class',\n 'model=57',\n 'model=570s',\n 'model=575m',\n 'model=599',\n 'model=5_series',\n 'model=5_series_gran_turismo',\n 'model=6',\n 'model=600-class',\n 'model=6000',\n 'model=612_scaglietti',\n 'model=62',\n 'model=626',\n 'model=650s_coupe',\n 'model=650s_spider',\n 'model=6_series',\n 'model=6_series_gran_coupe',\n 'model=718_cayman',\n 'model=740',\n 'model=760',\n 'model=780',\n 'model=7_series',\n 'model=80',\n 'model=850',\n 'model=86',\n 'model=8_series',\n 'model=9-2x',\n 'model=9-3',\n 'model=9-3_griffin',\n 'model=9-4x',\n 'model=9-5',\n 'model=9-7x',\n 'model=90',\n 'model=900',\n 'model=9000',\n 'model=911',\n 'model=928',\n 'model=929',\n 'model=940',\n 'model=944',\n 'model=960',\n 'model=968',\n 'model=a3',\n 'model=a4',\n 'model=a4_allroad',\n 'model=a5',\n 'model=a6',\n 'model=a7',\n 'model=a8',\n 'model=acadia',\n 'model=acadia_limited',\n 'model=accent',\n 'model=acclaim',\n 'model=accord',\n 'model=accord_crosstour',\n 'model=accord_hybrid',\n 'model=accord_plug-in_hybrid',\n 'model=achieva',\n 'model=activehybrid_5',\n 'model=activehybrid_7',\n 'model=activehybrid_x6',\n 'model=aerio',\n 'model=aerostar',\n 'model=alero',\n 'model=allante',\n 'model=allroad',\n 'model=allroad_quattro',\n 'model=alpina',\n 'model=alpina_b6_gran_coupe',\n 'model=alpina_b7',\n 'model=altima',\n 'model=altima_hybrid',\n 'model=amanti',\n 'model=amg_gt',\n 'model=armada',\n 'model=arnage',\n 'model=aspen',\n 'model=aspire',\n 'model=astro',\n 'model=astro_cargo',\n 'model=ats',\n 'model=ats-v',\n 'model=ats_coupe',\n 'model=aurora',\n 'model=avalanche',\n 'model=avalon',\n 'model=avalon_hybrid',\n 'model=avenger',\n 'model=aventador',\n 'model=aveo',\n 'model=aviator',\n 'model=axxess',\n 'model=azera',\n 'model=aztek',\n 'model=azure',\n 'model=azure_t',\n 'model=b-class_electric_drive',\n 'model=b-series',\n 'model=b-series_pickup',\n 'model=b-series_truck',\n 'model=b9_tribeca',\n 'model=baja',\n 'model=beetle',\n 'model=beetle_convertible',\n 'model=beretta',\n 'model=black_diamond_avalanche',\n 'model=blackwood',\n 'model=blazer',\n 'model=bolt_ev',\n 'model=bonneville',\n 'model=borrego',\n 'model=boxster',\n 'model=bravada',\n 'model=breeze',\n 'model=bronco',\n 'model=bronco_ii',\n 'model=brooklands',\n 'model=brougham',\n 'model=brz',\n 'model=c-class',\n 'model=c-max_hybrid',\n 'model=c/k_1500_series',\n 'model=c/k_2500_series',\n 'model=c30',\n 'model=c36_amg',\n 'model=c43_amg',\n 'model=c70',\n 'model=c8',\n 'model=cabrio',\n 'model=cabriolet',\n 'model=cadenza',\n 'model=caliber',\n 'model=california',\n 'model=california_t',\n 'model=camaro',\n 'model=camry',\n 'model=camry_hybrid',\n 'model=camry_solara',\n 'model=canyon',\n 'model=caprice',\n 'model=captiva_sport',\n 'model=caravan',\n 'model=carrera_gt',\n 'model=cascada',\n 'model=catera',\n 'model=cavalier',\n 'model=cayenne',\n 'model=cayman',\n 'model=cayman_s',\n 'model=cc',\n 'model=celebrity',\n 'model=celica',\n 'model=century',\n 'model=challenger',\n 'model=charger',\n 'model=chevy_van',\n 'model=ciera',\n 'model=cirrus',\n 'model=city_express',\n 'model=civic',\n 'model=civic_crx',\n 'model=civic_del_sol',\n 'model=cl',\n 'model=cl-class',\n 'model=cla-class',\n 'model=classic',\n 'model=clk-class',\n 'model=cls-class',\n 'model=cobalt',\n 'model=colorado',\n 'model=colt',\n 'model=concorde',\n 'model=continental',\n 'model=continental_flying_spur',\n 'model=continental_flying_spur_speed',\n 'model=continental_gt',\n 'model=continental_gt3-r',\n 'model=continental_gt_speed',\n 'model=continental_gt_speed_convertible',\n 'model=continental_gtc',\n 'model=continental_gtc_speed',\n 'model=continental_supersports',\n 'model=continental_supersports_convertible',\n 'model=contour',\n 'model=contour_svt',\n 'model=corniche',\n 'model=corolla',\n 'model=corolla_im',\n 'model=corrado',\n 'model=corsica',\n 'model=corvette',\n 'model=corvette_stingray',\n 'model=coupe',\n 'model=cr-v',\n 'model=cr-z',\n 'model=cressida',\n 'model=crossfire',\n 'model=crosstour',\n 'model=crosstrek',\n 'model=crown_victoria',\n 'model=cruze',\n 'model=cruze_limited',\n 'model=ct6',\n 'model=ct_200h',\n 'model=cts',\n 'model=cts-v',\n 'model=cts-v_coupe',\n 'model=cts-v_wagon',\n 'model=cts_coupe',\n 'model=cts_wagon',\n 'model=cube',\n 'model=custom_cruiser',\n 'model=cutlass',\n 'model=cutlass_calais',\n 'model=cutlass_ciera',\n 'model=cutlass_supreme',\n 'model=cx-3',\n 'model=cx-5',\n 'model=cx-7',\n 'model=cx-9',\n 'model=dakota',\n 'model=dart',\n 'model=dawn',\n 'model=daytona',\n 'model=db7',\n 'model=db9',\n 'model=db9_gt',\n 'model=dbs',\n 'model=defender',\n 'model=deville',\n 'model=diablo',\n 'model=diamante',\n 'model=discovery',\n 'model=discovery_series_ii',\n 'model=discovery_sport',\n 'model=dts',\n 'model=durango',\n 'model=dynasty',\n 'model=e-150',\n 'model=e-250',\n 'model=e-class',\n 'model=e-golf',\n 'model=e-series_van',\n 'model=e-series_wagon',\n 'model=e55_amg',\n 'model=echo',\n 'model=eclipse',\n 'model=eclipse_spyder',\n 'model=edge',\n 'model=eighty-eight',\n 'model=eighty-eight_royale',\n 'model=elantra',\n 'model=elantra_coupe',\n 'model=elantra_gt',\n 'model=elantra_touring',\n 'model=eldorado',\n 'model=electra',\n 'model=element',\n 'model=elise',\n 'model=enclave',\n 'model=encore',\n 'model=endeavor',\n 'model=entourage',\n 'model=envision',\n 'model=envoy',\n 'model=envoy_xl',\n 'model=envoy_xuv',\n 'model=enzo',\n 'model=eos',\n 'model=equator',\n 'model=equinox',\n 'model=equus',\n 'model=es_250',\n 'model=es_300',\n 'model=es_300h',\n 'model=es_330',\n 'model=es_350',\n 'model=escalade',\n 'model=escalade_esv',\n 'model=escalade_ext',\n 'model=escalade_hybrid',\n 'model=escape',\n 'model=escape_hybrid',\n 'model=escort',\n 'model=esprit',\n 'model=estate_wagon',\n 'model=esteem',\n 'model=eurovan',\n 'model=evora',\n 'model=evora_400',\n 'model=ex',\n 'model=ex35',\n 'model=excel',\n 'model=exige',\n 'model=expedition',\n 'model=explorer',\n 'model=explorer_sport',\n 'model=explorer_sport_trac',\n 'model=expo',\n 'model=express',\n 'model=express_cargo',\n 'model=f-150',\n 'model=f-150_heritage',\n 'model=f-150_svt_lightning',\n 'model=f-250',\n 'model=f12_berlinetta',\n 'model=f430',\n 'model=festiva',\n 'model=ff',\n 'model=fiesta',\n 'model=firebird',\n 'model=fit',\n 'model=fit_ev',\n 'model=five_hundred',\n 'model=fj_cruiser',\n 'model=fleetwood',\n 'model=flex',\n 'model=flying_spur',\n 'model=focus',\n 'model=focus_rs',\n 'model=focus_st',\n 'model=forenza',\n 'model=forester',\n 'model=forte',\n 'model=fox',\n 'model=fr-s',\n 'model=freelander',\n 'model=freestar',\n 'model=freestyle',\n 'model=frontier',\n 'model=fusion',\n 'model=fusion_hybrid',\n 'model=fx',\n 'model=fx35',\n 'model=fx45',\n 'model=fx50',\n 'model=g-class',\n 'model=g20',\n 'model=g3',\n 'model=g35',\n 'model=g37',\n 'model=g37_convertible',\n 'model=g37_coupe',\n 'model=g37_sedan',\n 'model=g5',\n 'model=g6',\n 'model=g8',\n 'model=g80',\n 'model=g_convertible',\n 'model=g_coupe',\n 'model=g_sedan',\n 'model=galant',\n 'model=gallardo',\n 'model=genesis',\n 'model=genesis_coupe',\n 'model=ghibli',\n 'model=ghost',\n 'model=ghost_series_ii',\n 'model=gl-class',\n 'model=gla-class',\n 'model=glc-class',\n 'model=gle-class',\n 'model=gle-class_coupe',\n 'model=gli',\n 'model=glk-class',\n 'model=gls-class',\n 'model=golf',\n 'model=golf_alltrack',\n 'model=golf_gti',\n 'model=golf_r',\n 'model=golf_sportwagen',\n 'model=grand_am',\n 'model=grand_caravan',\n 'model=grand_prix',\n 'model=grand_vitara',\n 'model=grand_voyager',\n 'model=gransport',\n 'model=granturismo',\n 'model=granturismo_convertible',\n 'model=gs_200t',\n 'model=gs_300',\n 'model=gs_350',\n 'model=gs_400',\n 'model=gs_430',\n 'model=gs_450h',\n 'model=gs_460',\n 'model=gs_f',\n 'model=gt',\n 'model=gt-r',\n 'model=gti',\n 'model=gto',\n 'model=gx_460',\n 'model=gx_470',\n 'model=h3',\n 'model=h3t',\n 'model=hhr',\n 'model=highlander',\n 'model=highlander_hybrid',\n 'model=horizon',\n 'model=hr-v',\n 'model=hs_250h',\n 'model=huracan',\n 'model=i-miev',\n 'model=i3',\n 'model=i30',\n 'model=i35',\n 'model=ia',\n 'model=ilx',\n 'model=ilx_hybrid',\n 'model=im',\n 'model=impala',\n 'model=impala_limited',\n 'model=imperial',\n 'model=impreza',\n 'model=impreza_wrx',\n 'model=insight',\n 'model=integra',\n 'model=intrepid',\n 'model=intrigue',\n 'model=iq',\n 'model=is_200t',\n 'model=is_250',\n 'model=is_250_c',\n 'model=is_300',\n 'model=is_350',\n 'model=is_350_c',\n 'model=is_f',\n 'model=j30',\n 'model=jetta',\n 'model=jetta_gli',\n 'model=jetta_hybrid',\n 'model=jetta_sportwagen',\n 'model=jimmy',\n 'model=journey',\n 'model=juke',\n 'model=justy',\n 'model=jx',\n 'model=k900',\n 'model=kizashi',\n 'model=lacrosse',\n 'model=lancer',\n 'model=lancer_evolution',\n 'model=lancer_sportback',\n 'model=land_cruiser',\n 'model=landaulet',\n 'model=laser',\n 'model=le_baron',\n 'model=le_mans',\n 'model=leaf',\n 'model=legacy',\n 'model=legend',\n 'model=lesabre',\n 'model=levante',\n 'model=lfa',\n 'model=lhs',\n 'model=loyale',\n 'model=lr2',\n 'model=lr3',\n 'model=lr4',\n 'model=ls',\n 'model=ls_400',\n 'model=ls_430',\n 'model=ls_460',\n 'model=ls_600h_l',\n 'model=lss',\n 'model=ltd_crown_victoria',\n 'model=lucerne',\n 'model=lumina',\n 'model=lumina_minivan',\n 'model=lx_450',\n 'model=lx_470',\n 'model=lx_570',\n 'model=m',\n 'model=m-class',\n 'model=m2',\n 'model=m3',\n 'model=m30',\n 'model=m35',\n 'model=m37',\n 'model=m4',\n 'model=m45',\n 'model=m4_gts',\n 'model=m5',\n 'model=m56',\n 'model=m6',\n 'model=m6_gran_coupe',\n 'model=macan',\n 'model=magnum',\n 'model=malibu',\n 'model=malibu_classic',\n 'model=malibu_hybrid',\n 'model=malibu_limited',\n 'model=malibu_maxx',\n 'model=mark_lt',\n 'model=mark_vii',\n 'model=mark_viii',\n 'model=matrix',\n 'model=maxima',\n 'model=maybach',\n 'model=mazdaspeed_3',\n 'model=mazdaspeed_6',\n 'model=mazdaspeed_mx-5_miata',\n 'model=mazdaspeed_protege',\n 'model=mdx',\n 'model=metris',\n 'model=metro',\n 'model=mighty_max_pickup',\n 'model=millenia',\n 'model=mirage',\n 'model=mirage_g4',\n 'model=mkc',\n 'model=mks',\n 'model=mkt',\n 'model=mkx',\n 'model=mkz',\n 'model=mkz_hybrid',\n 'model=ml55_amg',\n 'model=model_s',\n 'model=monaco',\n 'model=montana',\n 'model=montana_sv6',\n 'model=monte_carlo',\n 'model=montero',\n 'model=montero_sport',\n 'model=mp4-12c',\n 'model=mpv',\n 'model=mr2',\n 'model=mr2_spyder',\n 'model=mulsanne',\n 'model=murano',\n 'model=murano_crosscabriolet',\n 'model=murcielago',\n 'model=mustang',\n 'model=mustang_svt_cobra',\n 'model=mx-3',\n 'model=mx-5_miata',\n 'model=mx-6',\n 'model=navajo',\n 'model=navigator',\n 'model=neon',\n 'model=new_beetle',\n 'model=new_yorker',\n 'model=ninety-eight',\n 'model=nitro',\n 'model=nsx',\n 'model=nv200',\n 'model=nx',\n 'model=nx_200t',\n 'model=nx_300h',\n 'model=odyssey',\n 'model=omni',\n 'model=optima',\n 'model=optima_hybrid',\n 'model=outback',\n 'model=outlander',\n 'model=outlander_sport',\n 'model=pacifica',\n 'model=panamera',\n 'model=park_avenue',\n 'model=park_ward',\n 'model=paseo',\n 'model=passat',\n 'model=passport',\n 'model=pathfinder',\n 'model=phaeton',\n 'model=phantom',\n 'model=phantom_coupe',\n 'model=phantom_drophead_coupe',\n 'model=pickup',\n 'model=pilot',\n 'model=precis',\n 'model=prelude',\n 'model=previa',\n 'model=prius',\n 'model=prius_c',\n 'model=prius_prime',\n 'model=prius_v',\n 'model=prizm',\n 'model=probe',\n 'model=protege',\n 'model=protege5',\n 'model=prowler',\n 'model=pt_cruiser',\n 'model=pulsar',\n 'model=q3',\n 'model=q40',\n 'model=q45',\n 'model=q5',\n 'model=q50',\n 'model=q60_convertible',\n 'model=q60_coupe',\n 'model=q7',\n 'model=q70',\n 'model=quattroporte',\n 'model=quest',\n 'model=qx',\n 'model=qx4',\n 'model=qx50',\n 'model=qx56',\n 'model=qx60',\n 'model=qx70',\n 'model=qx80',\n 'model=r-class',\n 'model=r32',\n 'model=r8',\n 'model=rabbit',\n 'model=raider',\n 'model=rainier',\n 'model=rally_wagon',\n 'model=ram_150',\n 'model=ram_250',\n 'model=ram_50_pickup',\n 'model=ram_cargo',\n 'model=ram_pickup_1500',\n 'model=ram_van',\n 'model=ram_wagon',\n 'model=ramcharger',\n 'model=range_rover',\n 'model=range_rover_evoque',\n 'model=range_rover_sport',\n 'model=ranger',\n 'model=rapide',\n 'model=rapide_s',\n 'model=rav4',\n 'model=rav4_ev',\n 'model=rav4_hybrid',\n 'model=rc_200t',\n 'model=rc_300',\n 'model=rc_350',\n 'model=rc_f',\n 'model=rdx',\n 'model=reatta',\n 'model=regal',\n 'model=regency',\n 'model=rendezvous',\n 'model=reno',\n 'model=reventon',\n 'model=ridgeline',\n 'model=rio',\n 'model=riviera',\n 'model=rl',\n 'model=rlx',\n 'model=roadmaster',\n 'model=rogue',\n 'model=rogue_select',\n 'model=rondo',\n 'model=routan',\n 'model=rs_4',\n 'model=rs_5',\n 'model=rs_6',\n 'model=rs_7',\n 'model=rsx',\n 'model=rx-7',\n 'model=rx-8',\n 'model=rx_300',\n 'model=rx_330',\n 'model=rx_350',\n 'model=rx_400h',\n 'model=rx_450h',\n 'model=s-10',\n 'model=s-10_blazer',\n 'model=s-15',\n 'model=s-15_jimmy',\n 'model=s-class',\n 'model=s2000',\n 'model=s3',\n 'model=s4',\n 'model=s40',\n 'model=s5',\n 'model=s6',\n 'model=s60',\n 'model=s60_cross_country',\n 'model=s7',\n 'model=s70',\n 'model=s8',\n 'model=s80',\n 'model=s90',\n 'model=safari',\n 'model=safari_cargo',\n 'model=samurai',\n 'model=santa_fe',\n 'model=santa_fe_sport',\n 'model=savana',\n 'model=savana_cargo',\n 'model=sc_300',\n 'model=sc_400',\n 'model=sc_430',\n 'model=scoupe',\n 'model=sebring',\n 'model=sedona',\n 'model=sentra',\n 'model=sephia',\n 'model=sequoia',\n 'model=seville',\n 'model=shadow',\n 'model=shelby_gt350',\n 'model=shelby_gt500',\n 'model=sidekick',\n 'model=sienna',\n 'model=sierra_1500',\n 'model=sierra_1500_classic',\n 'model=sierra_1500_hybrid',\n 'model=sierra_1500hd',\n 'model=sierra_c3',\n 'model=sierra_classic_1500',\n 'model=sigma',\n 'model=silhouette',\n 'model=silver_seraph',\n 'model=silverado_1500',\n 'model=silverado_1500_classic',\n 'model=silverado_1500_hybrid',\n 'model=sixty_special',\n 'model=skylark',\n 'model=sl-class',\n 'model=slc-class',\n 'model=slk-class',\n 'model=slr_mclaren',\n 'model=sls_amg',\n 'model=sls_amg_gt',\n 'model=sls_amg_gt_final_edition',\n 'model=slx',\n 'model=solstice',\n 'model=sonata',\n 'model=sonata_hybrid',\n 'model=sonic',\n 'model=sonoma',\n 'model=sorento',\n 'model=soul',\n 'model=soul_ev',\n 'model=spark',\n 'model=spark_ev',\n 'model=spectra',\n 'model=spirit',\n 'model=sportage',\n 'model=sportvan',\n 'model=spyder',\n 'model=sq5',\n 'model=srt_viper',\n 'model=srx',\n 'model=ss',\n 'model=ssr',\n 'model=stanza',\n 'model=stealth',\n 'model=stratus',\n 'model=sts',\n 'model=sts-v',\n 'model=suburban',\n 'model=sunbird',\n 'model=sundance',\n 'model=sunfire',\n 'model=superamerica',\n 'model=supersports_convertible_isr',\n 'model=supra',\n 'model=svx',\n 'model=swift',\n 'model=sx4',\n 'model=syclone',\n 'model=t100',\n 'model=tacoma',\n 'model=tahoe',\n 'model=tahoe_hybrid',\n 'model=tahoe_limited/z71',\n 'model=taurus',\n 'model=taurus_x',\n 'model=tc',\n 'model=tempo',\n 'model=tercel',\n 'model=terrain',\n 'model=terraza',\n 'model=thunderbird',\n 'model=tiburon',\n 'model=tiguan',\n 'model=titan',\n 'model=tl',\n 'model=tlx',\n 'model=toronado',\n 'model=torrent',\n 'model=touareg',\n 'model=touareg_2',\n 'model=town_and_country',\n 'model=town_car',\n 'model=tracker',\n 'model=trailblazer',\n 'model=trailblazer_ext',\n 'model=trans_sport',\n 'model=transit_connect',\n 'model=transit_wagon',\n 'model=traverse',\n 'model=trax',\n 'model=tribeca',\n 'model=tribute',\n 'model=tribute_hybrid',\n 'model=truck',\n 'model=tsx',\n 'model=tsx_sport_wagon',\n 'model=tt',\n 'model=tt_rs',\n 'model=tts',\n 'model=tucson',\n 'model=tundra',\n 'model=typhoon',\n 'model=uplander',\n 'model=v12_vanquish',\n 'model=v12_vantage',\n 'model=v12_vantage_s',\n 'model=v40',\n 'model=v50',\n 'model=v60',\n 'model=v60_cross_country',\n 'model=v70',\n 'model=v8',\n 'model=v8_vantage',\n 'model=v90',\n 'model=van',\n 'model=vanagon',\n 'model=vandura',\n 'model=vanquish',\n 'model=vanwagon',\n 'model=veloster',\n 'model=venture',\n 'model=venza',\n 'model=veracruz',\n ...]"
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dv.get_feature_names()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T08:40:04.763970500Z",
     "start_time": "2023-12-06T08:40:04.370916800Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 训练模型"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "测试集决定系数:96.299%\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import Ridge\n",
    "X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=1)\n",
    "ridge_reg = Ridge(random_state=1)\n",
    "ridge_reg.fit(X_train,y_train)\n",
    "print('测试集决定系数:{:.3f}%'.format(ridge_reg.score(X_test,y_test)*100))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:15:58.819917400Z",
     "start_time": "2023-12-06T02:15:58.509928400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "测试集决定系数:99.710%\n"
     ]
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestRegressor\n",
    "rf_reg = RandomForestRegressor(random_state=1)\n",
    "rf_reg.fit(X_train,y_train)\n",
    "print('测试集决定系数:{:.3f}%'.format(rf_reg.score(X_train,y_train)*100))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:19:52.567555600Z",
     "start_time": "2023-12-06T02:19:04.464714400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[39527.04665952 29199.35023094 26014.34209956 37440.49725533\n",
      " 50189.21415126]\n"
     ]
    }
   ],
   "source": [
    "y_reg = rf_reg.predict(X_train[:5])\n",
    "print(np.expm1(y_reg))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:24:02.564328200Z",
     "start_time": "2023-12-06T02:24:02.506334Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[41418.70956625 25699.90002158 25434.22525411 38053.87324819\n",
      " 52516.00793851]\n"
     ]
    }
   ],
   "source": [
    "y_reg = ridge_reg.predict(X_train[:5])\n",
    "print(np.expm1(y_reg))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-12-06T02:25:42.989199700Z",
     "start_time": "2023-12-06T02:25:42.968862800Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    ">- 对于回归问题,我们可以先使用密度直方图来观察他的分布形状,如果分布存在长尾的情况,我们可以先对预测变量实行对数转化.将预测变量转为正态分布"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
